Detailed Survival analyis of the Survival lung data.

Libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('keep.trailing.zeros',TRUE)

Libraries

data(lung)
## Warning in data(lung): data set 'lung' not found
lung$inst <- NULL
lung$status <- lung$status - 1
lung <- lung[complete.cases(lung),]

pander::pander(table(lung$status))
0 1
47 121
pander::pander(summary(lung$time))
Min. 1st Qu. Median Mean 3rd Qu. Max.
5 175 268 310 416 1022

Exploring Raw Features with RRPlot

convar <- colnames(lung)[lapply(apply(lung,2,unique),length) > 10]
convar <- convar[convar != "time"]
topvar <- univariate_BinEnsemble(lung[,c("status",convar)],"status")
pander::pander(topvar)
age wt.loss
0.106 0.106
topv <- min(5,length(topvar))
topFive <- names(topvar)[1:topv]
RRanalysis <- list();
idx <- 1
for (topf in topFive)
{
  RRanalysis[[idx]] <- RRPlot(cbind(lung$status,lung[,topf]),
                              atProb=c(0.90),
                  timetoEvent=lung$time,
                  title=topf,
#                  plotRR=FALSE
                  )
  idx <- idx + 1
}

names(RRanalysis) <- topFive

Reporting the Metrics

ROCAUC <- NULL
CstatCI <- NULL
RRatios <- NULL
LogRangp <- NULL
Sensitivity <- NULL
Specificity <- NULL

for (topf in topFive)
{
  CstatCI <- rbind(CstatCI,RRanalysis[[topf]]$c.index$cstatCI)
  RRatios <- rbind(RRatios,RRanalysis[[topf]]$RR_atP)
  LogRangp <- rbind(LogRangp,RRanalysis[[topf]]$surdif$pvalue)
  Sensitivity <- rbind(Sensitivity,RRanalysis[[topf]]$ROCAnalysis$sensitivity)
  Specificity <- rbind(Specificity,RRanalysis[[topf]]$ROCAnalysis$specificity)
  ROCAUC <- rbind(ROCAUC,RRanalysis[[topf]]$ROCAnalysis$aucs)
}
rownames(CstatCI) <- topFive
rownames(RRatios) <- topFive
rownames(LogRangp) <- topFive
rownames(Sensitivity) <- topFive
rownames(Specificity) <- topFive
rownames(ROCAUC) <- topFive

pander::pander(ROCAUC)
  est lower upper
age 0.588 0.491 0.686
wt.loss 0.552 0.453 0.650
pander::pander(CstatCI)
  mean.C Index median lower upper
age 0.558 0.558 0.499 0.62
wt.loss 0.508 0.508 0.447 0.57
pander::pander(RRatios)
  est lower upper
age 1.016 0.748 1.38
wt.loss 0.785 0.462 1.33
pander::pander(LogRangp)
age 0.513
wt.loss 0.358
pander::pander(Sensitivity)
  est lower upper
age 0.0992 0.0523 0.167
wt.loss 0.0496 0.0184 0.105
pander::pander(Specificity)
  est lower upper
age 0.894 0.769 0.965
wt.loss 0.894 0.769 0.965
meanMatrix <- cbind(ROCAUC[,1],CstatCI[,1],Sensitivity[,1],Specificity[,1],RRatios[,1])
colnames(meanMatrix) <- c("ROCAUC","C-Stat","Sen","Spe","RR")
pander::pander(meanMatrix)
  ROCAUC C-Stat Sen Spe RR
age 0.588 0.558 0.0992 0.894 1.016
wt.loss 0.552 0.508 0.0496 0.894 0.785

Modeling

ml <- BSWiMS.model(Surv(time,status)~1,data=lung,NumberofRepeats = 10)

[++++++++++++++++++++++++++++++]…

sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower HR upper u.Accuracy r.Accuracy
ph.ecog 4.32e-01 1.194 1.541 1.988 0.679 0.649
sex -4.59e-01 0.456 0.632 0.876 0.649 0.679
pat.karno -1.77e-03 0.997 0.998 1.000 0.506 0.720
ph.karno -5.80e-07 1.000 1.000 1.000 0.577 0.720
Table continues below
  full.Accuracy u.AUC r.AUC full.AUC IDI NRI
ph.ecog 0.601 0.601 0.620 0.600 0.0449 0.405
sex 0.601 0.620 0.601 0.600 0.0285 0.478
pat.karno 0.506 0.585 0.500 0.585 0.0292 0.342
ph.karno 0.577 0.570 0.500 0.570 0.0143 0.280
  z.IDI z.NRI Delta.AUC Frequency
ph.ecog 3.33 2.48 -0.02005 1
sex 2.76 2.85 -0.00167 1
pat.karno 2.44 2.24 0.08546 1
ph.karno 2.22 1.64 0.06998 1

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

timeinterval <- 2*mean(subset(lung,status==1)$time)

h0 <- sum(lung$status & lung$time <= timeinterval)
h0 <- h0/sum((lung$time > timeinterval) | (lung$status==1))
pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.85 578
index <- predict(ml,lung)

rdata <- cbind(lung$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
                     timetoEvent=lung$time,
                     title="Raw Train: Lung Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

As we can see the Observed probability as well as the Time vs. Events are not calibrated.

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.649 0.475 0.339 0.339 0.493
RR 1.280 1.789 60.500 60.500 1.312
SEN 0.306 0.843 1.000 1.000 0.636
SPE 0.872 0.489 0.170 0.170 0.596
BACC 0.589 0.666 0.585 0.585 0.616
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.65 1.37 1.97 3.16e-07
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.23 1.23 1.19 1.27
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.21 1.21 1.2 1.22
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.652 0.653 0.59 0.71
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.691 0.598 0.784
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.306 0.225 0.396
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.872 0.743 0.952
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.649
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.28 1.08 1.52
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 8.009591 on 1 degrees of freedom, p = 0.004653
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 125 84 96.5 1.61 8.01
class=1 43 37 24.5 6.34 8.01

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,lung,"status","time")

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
1.29 1.52 749

The RRplot() of the calibrated model

h0 <- calprob$h0
timeinterval <- calprob$timeInterval;

rdata <- cbind(lung$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
                     timetoEvent=lung$time,
                     title="Train: Lung",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.796 0.624 0.467 0.467 0.479
RR 1.280 1.789 60.500 60.500 2.784
SEN 0.306 0.843 1.000 1.000 0.959
SPE 0.872 0.489 0.170 0.170 0.277
BACC 0.589 0.666 0.585 0.585 0.618
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.45 1.2 1.73 0.000124
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.06 1.06 1.02 1.1
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.01 1.01 1 1.01
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.652 0.652 0.592 0.712
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.691 0.598 0.784
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.306 0.225 0.396
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.872 0.743 0.952
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.796
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.28 1.08 1.52
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 8.009591 on 1 degrees of freedom, p = 0.004653
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 125 84 96.5 1.61 8.01
class=1 43 37 24.5 6.34 8.01

Cross-Validation

rcv <- randomCV(theData=lung,
                theOutcome = Surv(time,status)~1,
                fittingFunction=BSWiMS.model, 
                trainFraction = 0.95,
                repetitions=200,
                classSamplingType = "Pro"
         )

.[++].[++].[+++].[++].[+++].[+++].[+++].[+++].[++].[+++]10 Tested: 69 Avg. Selected: 3.6 Min Tests: 1 Max Tests: 5 Mean Tests: 1.449275 . MAD: 0.4785205

.[+++].[+++].[+++].[+++].[+++].[+++].[+++].[+++].[+++].[++]20 Tested: 110 Avg. Selected: 3.75 Min Tests: 1 Max Tests: 7 Mean Tests: 1.818182 . MAD: 0.4732616

.[++].[+++].[+++].[++++].[+++].[+].[++].[+++].[+++].[++++]30 Tested: 141 Avg. Selected: 3.766667 Min Tests: 1 Max Tests: 8 Mean Tests: 2.12766 . MAD: 0.4730391

.[++].[+++].[++++].[++-].[++++].[+++].[+++].[+++].[+++].[+++]40 Tested: 151 Avg. Selected: 3.825 Min Tests: 1 Max Tests: 10 Mean Tests: 2.649007 . MAD: 0.4718192

.[+++].[++].[+++].[+++].[+++].[+++].[+++].[+++].[+++].[++]50 Tested: 159 Avg. Selected: 3.82 Min Tests: 1 Max Tests: 10 Mean Tests: 3.144654 . MAD: 0.4751851

.[++-].[+++].[++-].[++].[+++].[+++].[+++].[+++].[++].[+++]60 Tested: 160 Avg. Selected: 3.783333 Min Tests: 1 Max Tests: 10 Mean Tests: 3.75 . MAD: 0.4739128

.[+++].[+++].[+++].[+].[+].[+++].[++].[+++].[+++].[+++]70 Tested: 164 Avg. Selected: 3.742857 Min Tests: 1 Max Tests: 10 Mean Tests: 4.268293 . MAD: 0.4742704

.[++].[+++].[+++].[+++].[+++].[++].[++].[++].[++].[+++]80 Tested: 168 Avg. Selected: 3.7125 Min Tests: 1 Max Tests: 10 Mean Tests: 4.761905 . MAD: 0.4752243

.[+++].[+++].[+++].[+++].[++].[+++].[++].[++].[+++].[++++]90 Tested: 168 Avg. Selected: 3.722222 Min Tests: 1 Max Tests: 12 Mean Tests: 5.357143 . MAD: 0.4750161

.[+++].[+].[+++].[++++].[+++].[++-].[+++].[+++].[+++].[+++]100 Tested: 168 Avg. Selected: 3.73 Min Tests: 1 Max Tests: 13 Mean Tests: 5.952381 . MAD: 0.4746718

.[+-].[+].[+++].[+++].[++].[++].[+++].[++-].[+++].[++]110 Tested: 168 Avg. Selected: 3.681818 Min Tests: 1 Max Tests: 14 Mean Tests: 6.547619 . MAD: 0.4751729

.[++].[++++].[++].[+++].[+++].[++-].[++++].[++].[++-].[+++]120 Tested: 168 Avg. Selected: 3.675 Min Tests: 1 Max Tests: 15 Mean Tests: 7.142857 . MAD: 0.4750718

.[+++].[++++].[+++].[+++].[+++].[++].[++].[+++].[++].[+++]130 Tested: 168 Avg. Selected: 3.684615 Min Tests: 2 Max Tests: 15 Mean Tests: 7.738095 . MAD: 0.4748304

.[++].[+++].[+++].[+++].[+++].[++].[++].[++-].[+++].[+++]140 Tested: 168 Avg. Selected: 3.678571 Min Tests: 2 Max Tests: 17 Mean Tests: 8.333333 . MAD: 0.4746694

.[+++].[++-].[+++].[+++].[+++].[++].[+++].[++].[+++].[+++]150 Tested: 168 Avg. Selected: 3.68 Min Tests: 3 Max Tests: 18 Mean Tests: 8.928571 . MAD: 0.4751227

.[++].[++].[+++].[+++].[+++].[++].[+++].[+++].[+++].[++]160 Tested: 168 Avg. Selected: 3.675 Min Tests: 3 Max Tests: 19 Mean Tests: 9.52381 . MAD: 0.4750523

.[+++-].[+++-].[+++].[++++].[+++].[+++].[++++].[+++].[++-].[++-]170 Tested: 168 Avg. Selected: 3.694118 Min Tests: 4 Max Tests: 20 Mean Tests: 10.11905 . MAD: 0.4749435

.[++].[+++].[++].[++].[++].[++].[++].[+++].[++].[+++]180 Tested: 168 Avg. Selected: 3.672222 Min Tests: 4 Max Tests: 22 Mean Tests: 10.71429 . MAD: 0.474824

.[+++].[+++].[+++].[+++].[+++].[+++].[++].[+++].[+++].[++]190 Tested: 168 Avg. Selected: 3.678947 Min Tests: 4 Max Tests: 23 Mean Tests: 11.30952 . MAD: 0.4750101

.[+++].[+++].[++].[+++].[+++].[+++].[+++].[++].[+++].[+++]200 Tested: 168 Avg. Selected: 3.685 Min Tests: 4 Max Tests: 27 Mean Tests: 11.90476 . MAD: 0.4751702

stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]

bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)

rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names

rrAnalysisTest <- RRPlot(rdatacv,atProb=c(0.90),
                     timetoEvent=times,
                     title="Test: Lung Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Cross-Validation Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.808 0.607 0.472 0.449 0.507
RR 1.186 2.958 3.299 1.000 2.784
SEN 0.198 0.959 0.975 1.000 0.959
SPE 0.894 0.298 0.213 0.000 0.277
BACC 0.546 0.628 0.594 0.500 0.618
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.44 1.2 1.72 0.000126
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.05 1.05 1.01 1.09
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.961 0.961 0.95 0.971
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.609 0.61 0.546 0.668
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.62 0.522 0.718
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.198 0.131 0.281
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.894 0.769 0.965
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.807
pander::pander(t(rrAnalysisTest$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.19 0.972 1.45
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 2.772401 on 1 degrees of freedom, p = 0.095902
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 139 97 103.4 0.397 2.77
class=1 29 24 17.6 2.334 2.77

Calibrating the test results

rdatacv <- cbind(status,prob,times)
calprob <- CalibrationProbPoissonRisk(rdatacv)

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.85 1 755
timeinterval <- calprob$timeInterval;

rdata <- cbind(status,calprob$prob)


rrAnalysisTest <- RRPlot(rdata,atProb=c(0.90),
                     timetoEvent=times,
                     title="Calibrated Test: Lung",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Calibrated Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.808 0.607 0.472 0.449 0.507
RR 1.186 2.958 3.299 1.000 2.784
SEN 0.198 0.959 0.975 1.000 0.959
SPE 0.894 0.298 0.213 0.000 0.277
BACC 0.546 0.628 0.594 0.500 0.618
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.45 1.2 1.73 9.67e-05
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.06 1.06 1.02 1.09
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.961 0.96 0.952 0.969
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.609 0.609 0.551 0.671
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.62 0.522 0.718
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.198 0.131 0.281
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.894 0.769 0.965
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.807
pander::pander(t(rrAnalysisTest$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.19 0.972 1.45
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 2.772401 on 1 degrees of freedom, p = 0.095902
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 139 97 103.4 0.397 2.77
class=1 29 24 17.6 2.334 2.77